#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import time
import sys
from pybloom_live import ScalableBloomFilter

reload(sys)
sys.setdefaultencoding('utf-8')



file_name = 'recruiting_history_bak'
index = 0
source_set = set()
with open(file_name,'r' ) as f ,open('set_file', 'w'  ) as log_f:
    for line in f:
        line = line.strip()
        if line:
            dic = json.loads(line)
            biddingSource = dic.get('recruitingSource')
            if biddingSource:
                source_set.add(biddingSource)


        if index % 100000 == 0:
            log_f.write('{}\n\n'.format(list(source_set)))
    else:
        log_f.write('{}\n\n'.format(list(source_set)))
